{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: requests-html in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (0.10.0)\n",
      "Requirement already satisfied: pyquery in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (2.0.0)\n",
      "Requirement already satisfied: parse in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.19.1)\n",
      "Requirement already satisfied: requests in d:\\anaconda\\lib\\site-packages (from requests-html) (2.27.1)\n",
      "Requirement already satisfied: fake-useragent in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.2.1)\n",
      "Requirement already satisfied: bs4 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (0.0.1)\n",
      "Requirement already satisfied: w3lib in d:\\anaconda\\lib\\site-packages (from requests-html) (1.21.0)\n",
      "Requirement already satisfied: pyppeteer>=0.0.14 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from requests-html) (1.0.2)\n",
      "Requirement already satisfied: websockets<11.0,>=10.0 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from pyppeteer>=0.0.14->requests-html) (10.4)\n",
      "Requirement already satisfied: certifi>=2021 in d:\\anaconda\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (2021.10.8)\n",
      "Requirement already satisfied: importlib-metadata>=1.4 in d:\\anaconda\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (4.11.3)\n",
      "Requirement already satisfied: pyee<9.0.0,>=8.1.0 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from pyppeteer>=0.0.14->requests-html) (8.2.2)\n",
      "Requirement already satisfied: appdirs<2.0.0,>=1.4.3 in d:\\anaconda\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.4.4)\n",
      "Requirement already satisfied: urllib3<2.0.0,>=1.25.8 in d:\\anaconda\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.26.9)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.42.1 in d:\\anaconda\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (4.64.0)\n",
      "Requirement already satisfied: zipp>=0.5 in d:\\anaconda\\lib\\site-packages (from importlib-metadata>=1.4->pyppeteer>=0.0.14->requests-html) (3.7.0)\n",
      "Requirement already satisfied: colorama in d:\\anaconda\\lib\\site-packages (from tqdm<5.0.0,>=4.42.1->pyppeteer>=0.0.14->requests-html) (0.4.4)\n",
      "Requirement already satisfied: beautifulsoup4 in d:\\anaconda\\lib\\site-packages (from bs4->requests-html) (4.11.1)\n",
      "Requirement already satisfied: soupsieve>1.2 in d:\\anaconda\\lib\\site-packages (from beautifulsoup4->bs4->requests-html) (2.3.1)\n",
      "Requirement already satisfied: importlib-resources>=5.0 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from fake-useragent->requests-html) (6.0.1)\n",
      "Requirement already satisfied: cssselect>=1.2.0 in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from pyquery->requests-html) (1.2.0)\n",
      "Requirement already satisfied: lxml>=2.1 in d:\\anaconda\\lib\\site-packages (from pyquery->requests-html) (4.8.0)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in d:\\anaconda\\lib\\site-packages (from requests->requests-html) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in d:\\anaconda\\lib\\site-packages (from requests->requests-html) (3.3)\n",
      "Requirement already satisfied: six>=1.4.1 in d:\\anaconda\\lib\\site-packages (from w3lib->requests-html) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install requests-html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie XSRF-TOKEN=zIARnRM0QQaLudJk_4OBXw; __gc_id=e936feed5bd343e5a73b304fa0d4ad5f; _ga=GA1.1.504776059.1697021947; __uuid=1697021948310.51; __tlog=1697021948354.48%7C00000000%7C00000000%7C00000000%7C00000000; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1697021951; acw_tc=2760828416970219810274366e53a098388abcfc886c89f5cbf2332344893f; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1697021975; __session_seq=4; __uv_seq=4; __tlg_event_seq=24; _ga_54YTJKWN86=GS1.1.1697021946.1.1.1697023027.0.0.0\n",
      "这是第1页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待9秒...然后以继续抓取\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.link</th>\n",
       "      <th>job.title</th>\n",
       "      <th>job.dq</th>\n",
       "      <th>job.refreshTime</th>\n",
       "      <th>job.jobKind</th>\n",
       "      <th>job.topJob</th>\n",
       "      <th>job.jobId</th>\n",
       "      <th>...</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>job.pcOuterLink</th>\n",
       "      <th>job.h5OuterLink</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>job.campusJobKind</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...</td>\n",
       "      <td>{\"userId\":\"181b5eaac20859c201c724cba006438e\",\"...</td>\n",
       "      <td>[广告策划, 新媒体]</td>\n",
       "      <td>https://www.liepin.com/a/49462105.shtml</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>深圳</td>\n",
       "      <td>20230916093155</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>49462105</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>云计算/大数据</td>\n",
       "      <td>某深圳云计算/大数据公司</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...</td>\n",
       "      <td>{\"userId\":\"deb548cc686cceb03ceb98aa3a938841\",\"...</td>\n",
       "      <td>[广告策划]</td>\n",
       "      <td>https://www.liepin.com/a/50016611.shtml</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>深圳-福田区</td>\n",
       "      <td>20231102191952</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>50016611</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>互联网</td>\n",
       "      <td>某深圳互联网公司</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...</td>\n",
       "      <td>{\"userId\":\"a7113020c23e14d83ae654ff5fc70253\",\"...</td>\n",
       "      <td>[广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]</td>\n",
       "      <td>https://www.liepin.com/a/49511733.shtml</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>深圳</td>\n",
       "      <td>20230919093804</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>49511733</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...</td>\n",
       "      <td>{\"userId\":\"7061eb526ed90d1a434ea43a8694af3c\",\"...</td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.liepin.com/a/49454013.shtml</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>深圳-福田区</td>\n",
       "      <td>20230915160702</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>49454013</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...</td>\n",
       "      <td>{\"userId\":\"902e867c150880b14dfa1eb12beb943c\",\"...</td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.liepin.com/a/49570391.shtml</td>\n",
       "      <td>广告策划</td>\n",
       "      <td>深圳</td>\n",
       "      <td>20231014200408</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>49570391</td>\n",
       "      <td>...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>60e2fe0bf3df194a3c48adb502u.png</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>咨询服务</td>\n",
       "      <td>某深圳咨询服务公司</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"6\",\"jobId\":\"62579301\",\"userId\":\"d4...</td>\n",
       "      <td>[3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...</td>\n",
       "      <td>https://www.liepin.com/lptjob/62579301</td>\n",
       "      <td>海外市场实习生(J11029)</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>20231031114858</td>\n",
       "      <td>6</td>\n",
       "      <td>False</td>\n",
       "      <td>62579301</td>\n",
       "      <td>...</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td>石头科技</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"6\",\"jobId\":\"62520763\",\"userId\":\"c1...</td>\n",
       "      <td>[3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...</td>\n",
       "      <td>https://www.liepin.com/lptjob/62520763</td>\n",
       "      <td>海外市场实习生(J11025)</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>20231027151058</td>\n",
       "      <td>6</td>\n",
       "      <td>False</td>\n",
       "      <td>62520763</td>\n",
       "      <td>...</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td>石头科技</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"6\",\"jobId\":\"62438217\",\"userId\":\"d4...</td>\n",
       "      <td>[3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...</td>\n",
       "      <td>https://www.liepin.com/lptjob/62438217</td>\n",
       "      <td>海外市场实习生(J11025)</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>20231024163614</td>\n",
       "      <td>6</td>\n",
       "      <td>False</td>\n",
       "      <td>62438217</td>\n",
       "      <td>...</td>\n",
       "      <td>https://www.liepin.com/company/9322029/</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>5d06ef909fae687d4390e99b07u.png</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>智能硬件</td>\n",
       "      <td>石头科技</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>9322029.0</td>\n",
       "      <td>实习</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"6\",\"jobId\":\"61734531\",\"userId\":\"bc...</td>\n",
       "      <td>[本科, 市场营销]</td>\n",
       "      <td>https://www.liepin.com/lptjob/61734531</td>\n",
       "      <td>数字化市场运营岗</td>\n",
       "      <td>深圳</td>\n",
       "      <td>20230918224323</td>\n",
       "      <td>6</td>\n",
       "      <td>False</td>\n",
       "      <td>61734531</td>\n",
       "      <td>...</td>\n",
       "      <td>https://www.liepin.com/company/2034027/</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5bfea5f974719d2aa34cef7003a.png</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>运营商/增值服务</td>\n",
       "      <td>中国联通广东省分公司</td>\n",
       "      <td>https://atsc.liepin.com/ats/apply-form/?jobId=...</td>\n",
       "      <td>https://matsc.liepin.com/ats/apply-form/?jobId...</td>\n",
       "      <td>2034027.0</td>\n",
       "      <td>应届</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"6\",\"jobId\":\"61666977\",\"userId\":\"f6...</td>\n",
       "      <td>[本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]</td>\n",
       "      <td>https://www.liepin.com/lptjob/61666977</td>\n",
       "      <td>品牌岗（新媒体及文案方向） (24届应届生)</td>\n",
       "      <td>深圳-坪山区</td>\n",
       "      <td>20231113143544</td>\n",
       "      <td>6</td>\n",
       "      <td>False</td>\n",
       "      <td>61666977</td>\n",
       "      <td>...</td>\n",
       "      <td>https://www.liepin.com/company/13261319/</td>\n",
       "      <td>NaN</td>\n",
       "      <td>63b40735895ec0179bd60f2507u.png</td>\n",
       "      <td></td>\n",
       "      <td>学校教育</td>\n",
       "      <td>深圳市华朗学校</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>13261319.0</td>\n",
       "      <td>应届</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...   \n",
       "1   %7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...   \n",
       "2   %7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...   \n",
       "3   %7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...   \n",
       "4   %7B%22skId%22%3A%22geq5b0qsi9umzbhn0oojbi2j8v8...   \n",
       "..                                                ...   \n",
       "35  %7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...   \n",
       "36  %7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...   \n",
       "37  %7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...   \n",
       "38  %7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...   \n",
       "39  %7B%22jobKind%22%3A%226%22%2C%22jobId%22%3A%22...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"userId\":\"181b5eaac20859c201c724cba006438e\",\"...   \n",
       "1   {\"userId\":\"deb548cc686cceb03ceb98aa3a938841\",\"...   \n",
       "2   {\"userId\":\"a7113020c23e14d83ae654ff5fc70253\",\"...   \n",
       "3   {\"userId\":\"7061eb526ed90d1a434ea43a8694af3c\",\"...   \n",
       "4   {\"userId\":\"902e867c150880b14dfa1eb12beb943c\",\"...   \n",
       "..                                                ...   \n",
       "35  {\"jobKind\":\"6\",\"jobId\":\"62579301\",\"userId\":\"d4...   \n",
       "36  {\"jobKind\":\"6\",\"jobId\":\"62520763\",\"userId\":\"c1...   \n",
       "37  {\"jobKind\":\"6\",\"jobId\":\"62438217\",\"userId\":\"d4...   \n",
       "38  {\"jobKind\":\"6\",\"jobId\":\"61734531\",\"userId\":\"bc...   \n",
       "39  {\"jobKind\":\"6\",\"jobId\":\"61666977\",\"userId\":\"f6...   \n",
       "\n",
       "                                           job.labels  \\\n",
       "0                                         [广告策划, 新媒体]   \n",
       "1                                              [广告策划]   \n",
       "2                  [广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]   \n",
       "3                                                  []   \n",
       "4                                                  []   \n",
       "..                                                ...   \n",
       "35  [3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...   \n",
       "36  [3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...   \n",
       "37  [3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...   \n",
       "38                                         [本科, 市场营销]   \n",
       "39   [本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]   \n",
       "\n",
       "                                   job.link               job.title  job.dq  \\\n",
       "0   https://www.liepin.com/a/49462105.shtml                    广告策划      深圳   \n",
       "1   https://www.liepin.com/a/50016611.shtml                    广告策划  深圳-福田区   \n",
       "2   https://www.liepin.com/a/49511733.shtml                    广告策划      深圳   \n",
       "3   https://www.liepin.com/a/49454013.shtml                    广告策划  深圳-福田区   \n",
       "4   https://www.liepin.com/a/49570391.shtml                    广告策划      深圳   \n",
       "..                                      ...                     ...     ...   \n",
       "35   https://www.liepin.com/lptjob/62579301         海外市场实习生(J11029)  深圳-南山区   \n",
       "36   https://www.liepin.com/lptjob/62520763         海外市场实习生(J11025)  深圳-南山区   \n",
       "37   https://www.liepin.com/lptjob/62438217         海外市场实习生(J11025)  深圳-南山区   \n",
       "38   https://www.liepin.com/lptjob/61734531                数字化市场运营岗      深圳   \n",
       "39   https://www.liepin.com/lptjob/61666977  品牌岗（新媒体及文案方向） (24届应届生)  深圳-坪山区   \n",
       "\n",
       "   job.refreshTime job.jobKind  job.topJob job.jobId  ...  \\\n",
       "0   20230916093155           1       False  49462105  ...   \n",
       "1   20231102191952           1       False  50016611  ...   \n",
       "2   20230919093804           1       False  49511733  ...   \n",
       "3   20230915160702           1       False  49454013  ...   \n",
       "4   20231014200408           1       False  49570391  ...   \n",
       "..             ...         ...         ...       ...  ...   \n",
       "35  20231031114858           6       False  62579301  ...   \n",
       "36  20231027151058           6       False  62520763  ...   \n",
       "37  20231024163614           6       False  62438217  ...   \n",
       "38  20230918224323           6       False  61734531  ...   \n",
       "39  20231113143544           6       False  61666977  ...   \n",
       "\n",
       "                                   comp.link comp.compStage  \\\n",
       "0                                                     融资未公开   \n",
       "1                                                     不需要融资   \n",
       "2                                                       NaN   \n",
       "3                                                       NaN   \n",
       "4                                                       NaN   \n",
       "..                                       ...            ...   \n",
       "35   https://www.liepin.com/company/9322029/         沪深A股上市   \n",
       "36   https://www.liepin.com/company/9322029/         沪深A股上市   \n",
       "37   https://www.liepin.com/company/9322029/         沪深A股上市   \n",
       "38   https://www.liepin.com/company/2034027/            NaN   \n",
       "39  https://www.liepin.com/company/13261319/            NaN   \n",
       "\n",
       "                      comp.compLogo comp.compScale comp.compIndustry  \\\n",
       "0   60e2fe0bf3df194a3c48adb502u.png       100-499人           云计算/大数据   \n",
       "1   60e2fe0bf3df194a3c48adb502u.png          1-49人               互联网   \n",
       "2   60e2fe0bf3df194a3c48adb502u.png         50-99人              咨询服务   \n",
       "3   60e2fe0bf3df194a3c48adb502u.png         50-99人              咨询服务   \n",
       "4   60e2fe0bf3df194a3c48adb502u.png         50-99人              咨询服务   \n",
       "..                              ...            ...               ...   \n",
       "35  5d06ef909fae687d4390e99b07u.png     1000-2000人              智能硬件   \n",
       "36  5d06ef909fae687d4390e99b07u.png     1000-2000人              智能硬件   \n",
       "37  5d06ef909fae687d4390e99b07u.png     1000-2000人              智能硬件   \n",
       "38  5bfea5f974719d2aa34cef7003a.png       10000人以上          运营商/增值服务   \n",
       "39  63b40735895ec0179bd60f2507u.png                             学校教育   \n",
       "\n",
       "   comp.compName                                    job.pcOuterLink  \\\n",
       "0   某深圳云计算/大数据公司                                                NaN   \n",
       "1       某深圳互联网公司                                                NaN   \n",
       "2      某深圳咨询服务公司                                                NaN   \n",
       "3      某深圳咨询服务公司                                                NaN   \n",
       "4      某深圳咨询服务公司                                                NaN   \n",
       "..           ...                                                ...   \n",
       "35          石头科技                                                      \n",
       "36          石头科技                                                      \n",
       "37          石头科技                                                      \n",
       "38    中国联通广东省分公司  https://atsc.liepin.com/ats/apply-form/?jobId=...   \n",
       "39       深圳市华朗学校                                                      \n",
       "\n",
       "                                      job.h5OuterLink comp.compId  \\\n",
       "0                                                 NaN         NaN   \n",
       "1                                                 NaN         NaN   \n",
       "2                                                 NaN         NaN   \n",
       "3                                                 NaN         NaN   \n",
       "4                                                 NaN         NaN   \n",
       "..                                                ...         ...   \n",
       "35                                                      9322029.0   \n",
       "36                                                      9322029.0   \n",
       "37                                                      9322029.0   \n",
       "38  https://matsc.liepin.com/ats/apply-form/?jobId...   2034027.0   \n",
       "39                                                     13261319.0   \n",
       "\n",
       "    job.campusJobKind  \n",
       "0                 NaN  \n",
       "1                 NaN  \n",
       "2                 NaN  \n",
       "3                 NaN  \n",
       "4                 NaN  \n",
       "..                ...  \n",
       "35                 实习  \n",
       "36                 实习  \n",
       "37                 实习  \n",
       "38                 应届  \n",
       "39                 应届  \n",
       "\n",
       "[400 rows x 32 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 猎聘 没有设置登陆cookie，仅可抓取10页\n",
    "import crawl_liepin\n",
    "\n",
    "crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='应届生')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. cookies实现登陆，获取完整数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建立登陆cookie\n",
    "cookie = \"inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\"\n",
    "cookie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\n",
      "这是第1页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待4秒...然后以继续抓取\n"
     ]
    }
   ],
   "source": [
    "import crawl_liepin\n",
    "\n",
    "广告策划_深圳_results = crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='',登录cookie=cookie)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 使用数据分析进行数据交互式可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method DataFrame.to_html of                                              dataInfo  \\\n",
       "0   %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "1   %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "2   %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "3   %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "4   %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "..                                                ...   \n",
       "35  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "36  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "37  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "38  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "39  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"jobId\":\"49462105\",\"imId\":\"3f588040b06a7b314f...   \n",
       "1   {\"jobId\":\"50016611\",\"imId\":\"98379e69c2eee69d99...   \n",
       "2   {\"jobId\":\"49511733\",\"imId\":\"ab85de58bc41b6079d...   \n",
       "3   {\"jobId\":\"49454013\",\"imId\":\"2e7803cd11f19d9be4...   \n",
       "4   {\"jobId\":\"49570391\",\"imId\":\"f8f6775be4aaadf9b5...   \n",
       "..                                                ...   \n",
       "35  {\"recruiterPhoto\":\"617aa8473dba297879e635a302u...   \n",
       "36  {\"recruiterPhoto\":\"5f8f9866dfb13a7dee342f1808u...   \n",
       "37  {\"recruiterPhoto\":\"617aa8473dba297879e635a302u...   \n",
       "38  {\"recruiterPhoto\":\"5f8f9868f6d1ab58476f24a008u...   \n",
       "39  {\"recruiterPhoto\":\"5f8f986779c7cc70efbf36c008u...   \n",
       "\n",
       "                                           job.labels  \\\n",
       "0                                         [广告策划, 新媒体]   \n",
       "1                                              [广告策划]   \n",
       "2                  [广告策划, 营销策划, 新媒体, 新媒体策划, 广告/传媒/文化]   \n",
       "3                                                  []   \n",
       "4                                                  []   \n",
       "..                                                ...   \n",
       "35  [3个月, 提供转正, 本科, 市场营销, 广告策划, 市场策划, 品牌推广, 活动策划, ...   \n",
       "36  [3个月, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, 广告媒介, ...   \n",
       "37  [3个月, 提供转正, 本科, 市场营销, 活动策划, 广告策划, 市场策划, 品牌推广, ...   \n",
       "38                                         [本科, 市场营销]   \n",
       "39   [本科, 品牌推广, 品牌运营, 新媒体, 国际品牌, 公众号, 广告/传媒/文化, 教育培训]   \n",
       "\n",
       "                                   job.link job.jobId  job.advViewFlag  \\\n",
       "0   https://www.liepin.com/a/49462105.shtml  49462105            False   \n",
       "1   https://www.liepin.com/a/50016611.shtml  50016611            False   \n",
       "2   https://www.liepin.com/a/49511733.shtml  49511733            False   \n",
       "3   https://www.liepin.com/a/49454013.shtml  49454013            False   \n",
       "4   https://www.liepin.com/a/49570391.shtml  49570391            False   \n",
       "..                                      ...       ...              ...   \n",
       "35   https://www.liepin.com/lptjob/62579301  62579301            False   \n",
       "36   https://www.liepin.com/lptjob/62520763  62520763            False   \n",
       "37   https://www.liepin.com/lptjob/62438217  62438217            False   \n",
       "38   https://www.liepin.com/lptjob/61734531  61734531            False   \n",
       "39   https://www.liepin.com/lptjob/61666977  61666977            False   \n",
       "\n",
       "    job.dq               job.title job.refreshTime  job.topJob  ...  \\\n",
       "0       深圳                    广告策划  20230916093155       False  ...   \n",
       "1   深圳-福田区                    广告策划  20231102191952       False  ...   \n",
       "2       深圳                    广告策划  20230919093804       False  ...   \n",
       "3   深圳-福田区                    广告策划  20230915160702       False  ...   \n",
       "4       深圳                    广告策划  20231014200408       False  ...   \n",
       "..     ...                     ...             ...         ...  ...   \n",
       "35  深圳-南山区         海外市场实习生(J11029)  20231031114858       False  ...   \n",
       "36  深圳-南山区         海外市场实习生(J11025)  20231027151058       False  ...   \n",
       "37  深圳-南山区         海外市场实习生(J11025)  20231024163614       False  ...   \n",
       "38      深圳                数字化市场运营岗  20230918224323       False  ...   \n",
       "39  深圳-坪山区  品牌岗（新媒体及文案方向） (24届应届生)  20231113143544       False  ...   \n",
       "\n",
       "   comp.compScale                                 comp.link comp.compName  \\\n",
       "0        100-499人                                            某深圳云计算/大数据公司   \n",
       "1           1-49人                                                某深圳互联网公司   \n",
       "2          50-99人                                               某深圳咨询服务公司   \n",
       "3          50-99人                                               某深圳咨询服务公司   \n",
       "4          50-99人                                               某深圳咨询服务公司   \n",
       "..            ...                                       ...           ...   \n",
       "35     1000-2000人   https://www.liepin.com/company/9322029/          石头科技   \n",
       "36     1000-2000人   https://www.liepin.com/company/9322029/          石头科技   \n",
       "37     1000-2000人   https://www.liepin.com/company/9322029/          石头科技   \n",
       "38       10000人以上   https://www.liepin.com/company/2034027/    中国联通广东省分公司   \n",
       "39                 https://www.liepin.com/company/13261319/       深圳市华朗学校   \n",
       "\n",
       "   comp.compIndustry comp.compStage                    comp.compLogo  \\\n",
       "0            云计算/大数据          融资未公开  60e2fe0bf3df194a3c48adb502u.png   \n",
       "1                互联网          不需要融资  60e2fe0bf3df194a3c48adb502u.png   \n",
       "2               咨询服务            NaN  60e2fe0bf3df194a3c48adb502u.png   \n",
       "3               咨询服务            NaN  60e2fe0bf3df194a3c48adb502u.png   \n",
       "4               咨询服务            NaN  60e2fe0bf3df194a3c48adb502u.png   \n",
       "..               ...            ...                              ...   \n",
       "35              智能硬件         沪深A股上市  5d06ef909fae687d4390e99b07u.png   \n",
       "36              智能硬件         沪深A股上市  5d06ef909fae687d4390e99b07u.png   \n",
       "37              智能硬件         沪深A股上市  5d06ef909fae687d4390e99b07u.png   \n",
       "38          运营商/增值服务            NaN  5bfea5f974719d2aa34cef7003a.png   \n",
       "39              学校教育            NaN  63b40735895ec0179bd60f2507u.png   \n",
       "\n",
       "                                      job.pcOuterLink  \\\n",
       "0                                                 NaN   \n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "..                                                ...   \n",
       "35                                                      \n",
       "36                                                      \n",
       "37                                                      \n",
       "38  https://atsc.liepin.com/ats/apply-form/?jobId=...   \n",
       "39                                                      \n",
       "\n",
       "                                      job.h5OuterLink comp.compId  \\\n",
       "0                                                 NaN         NaN   \n",
       "1                                                 NaN         NaN   \n",
       "2                                                 NaN         NaN   \n",
       "3                                                 NaN         NaN   \n",
       "4                                                 NaN         NaN   \n",
       "..                                                ...         ...   \n",
       "35                                                      9322029.0   \n",
       "36                                                      9322029.0   \n",
       "37                                                      9322029.0   \n",
       "38  https://matsc.liepin.com/ats/apply-form/?jobId...   2034027.0   \n",
       "39                                                     13261319.0   \n",
       "\n",
       "   job.campusJobKind  \n",
       "0                NaN  \n",
       "1                NaN  \n",
       "2                NaN  \n",
       "3                NaN  \n",
       "4                NaN  \n",
       "..               ...  \n",
       "35                实习  \n",
       "36                实习  \n",
       "37                实习  \n",
       "38                应届  \n",
       "39                应届  \n",
       "\n",
       "[400 rows x 32 columns]>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "广告策划_深圳_results.to_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['dataInfo', 'dataParams', 'job.labels', 'job.link', 'job.jobId',\n",
       "       'job.advViewFlag', 'job.dq', 'job.title', 'job.refreshTime',\n",
       "       'job.topJob', 'job.jobKind', 'job.requireEduLevel', 'job.salary',\n",
       "       'job.requireWorkYears', 'job.dataPromId', 'recruiter.imId',\n",
       "       'recruiter.imUserType', 'recruiter.chatted', 'recruiter.recruiterName',\n",
       "       'recruiter.recruiterTitle', 'recruiter.recruiterId',\n",
       "       'recruiter.recruiterPhoto', 'comp.compScale', 'comp.link',\n",
       "       'comp.compName', 'comp.compIndustry', 'comp.compStage', 'comp.compLogo',\n",
       "       'job.pcOuterLink', 'job.h5OuterLink', 'comp.compId',\n",
       "       'job.campusJobKind'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "广告策划_深圳_results.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "深圳        141\n",
       "深圳-南山区     70\n",
       "深圳-福田区     57\n",
       "深圳-宝安区     45\n",
       "深圳-龙岗区     39\n",
       "深圳-龙华区     23\n",
       "深圳-罗湖区     15\n",
       "深圳-坪山区      5\n",
       "深圳-光明区      4\n",
       "深圳-盐田区      1\n",
       "Name: job.dq, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq = 广告策划_深圳_results['job.dq'].value_counts()\n",
    "series_dq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['深圳-南山区',\n",
       " '深圳-福田区',\n",
       " '深圳-宝安区',\n",
       " '深圳-龙岗区',\n",
       " '深圳-龙华区',\n",
       " '深圳-罗湖区',\n",
       " '深圳-坪山区',\n",
       " '深圳-光明区',\n",
       " '深圳-盐田区']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dq_name= [i for i in series_dq.index if '-' in i]\n",
    "dq_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([70, 57, 45, 39, 23, 15,  5,  4,  1], dtype=int64)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq[dq_name].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: pyecharts in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (2.0.3)\n",
      "Requirement already satisfied: simplejson in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from pyecharts) (3.19.1)\n",
      "Requirement already satisfied: jinja2 in d:\\anaconda\\lib\\site-packages (from pyecharts) (2.11.3)\n",
      "Requirement already satisfied: prettytable in c:\\users\\jenny\\appdata\\roaming\\python\\python39\\site-packages (from pyecharts) (3.7.0)\n",
      "Requirement already satisfied: MarkupSafe>=0.23 in d:\\anaconda\\lib\\site-packages (from jinja2->pyecharts) (2.0.1)\n",
      "Requirement already satisfied: wcwidth in d:\\anaconda\\lib\\site-packages (from prettytable->pyecharts) (0.2.5)\n"
     ]
    }
   ],
   "source": [
    "!pip install pyecharts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Map\n",
    "from pyecharts.faker import Faker\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['南山区', '福田区', '宝安区', '龙岗区', '龙华区', '罗湖区', '坪山区', '光明区', '盐田区']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[i.split('-')[1] for i in dq_name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[70, 57, 45, 39, 23, 15, 5, 4, 1]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_dq[dq_name].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts':'https://assets.pyecharts.org/assets/v5/echarts.min', '深圳':'https://assets.pyecharts.org/assets/v5/maps/guang3_dong1_shen1_zhen4'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "\n",
       "        <div id=\"486ea39f0686412b96abbdeb09d8337d\" style=\"width:900px; height:500px;\"></div>\n",
       "\n",
       "<script>\n",
       "        require(['echarts', '深圳'], function(echarts) {\n",
       "                var chart_486ea39f0686412b96abbdeb09d8337d = echarts.init(\n",
       "                    document.getElementById('486ea39f0686412b96abbdeb09d8337d'), 'white', {renderer: 'canvas'});\n",
       "                var option_486ea39f0686412b96abbdeb09d8337d = {\n",
       "    \"animation\": true,\n",
       "    \"animationThreshold\": 2000,\n",
       "    \"animationDuration\": 1000,\n",
       "    \"animationEasing\": \"cubicOut\",\n",
       "    \"animationDelay\": 0,\n",
       "    \"animationDurationUpdate\": 300,\n",
       "    \"animationEasingUpdate\": \"cubicOut\",\n",
       "    \"animationDelayUpdate\": 0,\n",
       "    \"aria\": {\n",
       "        \"enabled\": false\n",
       "    },\n",
       "    \"color\": [\n",
       "        \"#5470c6\",\n",
       "        \"#91cc75\",\n",
       "        \"#fac858\",\n",
       "        \"#ee6666\",\n",
       "        \"#73c0de\",\n",
       "        \"#3ba272\",\n",
       "        \"#fc8452\",\n",
       "        \"#9a60b4\",\n",
       "        \"#ea7ccc\"\n",
       "    ],\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"map\",\n",
       "            \"name\": \"\\u5e7f\\u544a\\u7b56\\u5212\\u5730\\u533a\\u5206\\u5e03\",\n",
       "            \"label\": {\n",
       "                \"show\": true,\n",
       "                \"margin\": 8\n",
       "            },\n",
       "            \"map\": \"\\u6df1\\u5733\",\n",
       "            \"data\": [\n",
       "                {\n",
       "                    \"name\": \"\\u5357\\u5c71\\u533a\",\n",
       "                    \"value\": 70\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u798f\\u7530\\u533a\",\n",
       "                    \"value\": 57\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5b9d\\u5b89\\u533a\",\n",
       "                    \"value\": 45\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9f99\\u5c97\\u533a\",\n",
       "                    \"value\": 39\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9f99\\u534e\\u533a\",\n",
       "                    \"value\": 23\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7f57\\u6e56\\u533a\",\n",
       "                    \"value\": 15\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u576a\\u5c71\\u533a\",\n",
       "                    \"value\": 5\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5149\\u660e\\u533a\",\n",
       "                    \"value\": 4\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u76d0\\u7530\\u533a\",\n",
       "                    \"value\": 1\n",
       "                }\n",
       "            ],\n",
       "            \"roam\": true,\n",
       "            \"aspectScale\": 0.75,\n",
       "            \"nameProperty\": \"name\",\n",
       "            \"selectedMode\": false,\n",
       "            \"zoom\": 1,\n",
       "            \"zlevel\": 0,\n",
       "            \"z\": 2,\n",
       "            \"seriesLayoutBy\": \"column\",\n",
       "            \"datasetIndex\": 0,\n",
       "            \"mapValueCalculation\": \"sum\",\n",
       "            \"showLegendSymbol\": true,\n",
       "            \"emphasis\": {}\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"\\u5e7f\\u544a\\u7b56\\u5212\\u5730\\u533a\\u5206\\u5e03\"\n",
       "            ],\n",
       "            \"selected\": {},\n",
       "            \"show\": true,\n",
       "            \"padding\": 5,\n",
       "            \"itemGap\": 10,\n",
       "            \"itemWidth\": 25,\n",
       "            \"itemHeight\": 14,\n",
       "            \"backgroundColor\": \"transparent\",\n",
       "            \"borderColor\": \"#ccc\",\n",
       "            \"borderWidth\": 1,\n",
       "            \"borderRadius\": 0,\n",
       "            \"pageButtonItemGap\": 5,\n",
       "            \"pageButtonPosition\": \"end\",\n",
       "            \"pageFormatter\": \"{current}/{total}\",\n",
       "            \"pageIconColor\": \"#2f4554\",\n",
       "            \"pageIconInactiveColor\": \"#aaa\",\n",
       "            \"pageIconSize\": 15,\n",
       "            \"animationDurationUpdate\": 800,\n",
       "            \"selector\": false,\n",
       "            \"selectorPosition\": \"auto\",\n",
       "            \"selectorItemGap\": 7,\n",
       "            \"selectorButtonGap\": 10\n",
       "        }\n",
       "    ],\n",
       "    \"tooltip\": {\n",
       "        \"show\": true,\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"showContent\": true,\n",
       "        \"alwaysShowContent\": false,\n",
       "        \"showDelay\": 0,\n",
       "        \"hideDelay\": 100,\n",
       "        \"enterable\": false,\n",
       "        \"confine\": false,\n",
       "        \"appendToBody\": false,\n",
       "        \"transitionDuration\": 0.4,\n",
       "        \"textStyle\": {\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"borderWidth\": 0,\n",
       "        \"padding\": 5,\n",
       "        \"order\": \"seriesAsc\"\n",
       "    },\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"show\": true,\n",
       "            \"text\": \"Map-\\u6df1\\u5733-\\u5e7f\\u544a\\u7b56\\u5212\",\n",
       "            \"target\": \"blank\",\n",
       "            \"subtarget\": \"blank\",\n",
       "            \"padding\": 5,\n",
       "            \"itemGap\": 10,\n",
       "            \"textAlign\": \"auto\",\n",
       "            \"textVerticalAlign\": \"auto\",\n",
       "            \"triggerEvent\": false\n",
       "        }\n",
       "    ],\n",
       "    \"visualMap\": {\n",
       "        \"show\": true,\n",
       "        \"type\": \"continuous\",\n",
       "        \"min\": 0,\n",
       "        \"max\": 100,\n",
       "        \"inRange\": {\n",
       "            \"color\": [\n",
       "                \"#50a3ba\",\n",
       "                \"#eac763\",\n",
       "                \"#d94e5d\"\n",
       "            ]\n",
       "        },\n",
       "        \"calculable\": true,\n",
       "        \"inverse\": false,\n",
       "        \"splitNumber\": 5,\n",
       "        \"hoverLink\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"padding\": 5,\n",
       "        \"showLabel\": true,\n",
       "        \"itemWidth\": 20,\n",
       "        \"itemHeight\": 140,\n",
       "        \"borderWidth\": 0\n",
       "    }\n",
       "};\n",
       "                chart_486ea39f0686412b96abbdeb09d8337d.setOption(option_486ea39f0686412b96abbdeb09d8337d);\n",
       "        });\n",
       "    </script>\n"
      ],
      "text/plain": [
       "<pyecharts.render.display.HTML at 0x2ef192cbca0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Map\n",
    "from pyecharts.faker import Faker\n",
    "\n",
    "c = (\n",
    "    Map()\n",
    "    .add(\"广告策划地区分布\", [list(z) for z in zip([i.split('-')[1] for i in dq_name],series_dq[dq_name].values.tolist())], \"深圳\")\n",
    "    .set_global_opts(\n",
    "        title_opts=opts.TitleOpts(title=\"Map-深圳-广告策划\"), visualmap_opts=opts.VisualMapOpts()\n",
    "    )\n",
    "    \n",
    ")\n",
    "c.render_notebook()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=daf7251f92024e8969feb28b0e9ad34c; XSRF-TOKEN=SIVa3Y_gRaqR7k-mI1I6kQ; __gc_id=d474cd1529ae40a29b8f9c81b7e60de7; _ga=GA1.1.1301373590.1698838360; __uuid=1698838362686.73; __tlog=1698838362732.34%7C00000000%7C00000000%7C00000000%7C00000000; acw_tc=276077be16988383924452585e8d8e297846bb9fcc8c8f34695b420d154b08; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1698838365; UniqueKey=95507c72a8d5ae141a667e00ad0d9493; liepin_login_valid=0; lt_auth=u%2B5bbHQGxlzxtXfR3zQN4vociI39UWvIpX8EhE0Ahoe%2BCqG04PngSwOGq7EExAMhlkh1ccULN7n2Pev2zXtP4kcTwGqnl4CyvOW92GECTeNcN8W2vezHl8zRQpQcl0AC8nFbtkIL%2BQ%3D%3D; access_system=C; user_roles=0; user_photo=5f8fa3a679c7cc70efbf444e08u.png; user_name=%E8%AE%B8%E6%99%BA%E8%B6%85; need_bind_tel=false; new_user=false; c_flag=fa43f4d55f3df63a96a7b4f194e214d4; inited_user=daf7251f92024e8969feb28b0e9ad34c; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1698838461; imId=c5f9b89f8466dffe6882ca1e5431db9c; imId_0=c5f9b89f8466dffe6882ca1e5431db9c; imClientId=c5f9b89f8466dffeb1921abcfab3aed0; imClientId_0=c5f9b89f8466dffeb1921abcfab3aed0; imApp_0=1; __session_seq=7; __uv_seq=7; fe_im_socketSequence_new_0=1_1_1; __tlg_event_seq=116; fe_im_opened_pages=; fe_im_connectJson_0=%7B%220_95507c72a8d5ae141a667e00ad0d9493%22%3A%7B%22socketConnect%22%3A%222%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; _ga_54YTJKWN86=GS1.1.1698838360.1.1.1698838528.0.0.0\n",
      "这是第1页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待6秒...然后以继续抓取\n"
     ]
    }
   ],
   "source": [
    "import crawl_liepin  \n",
    "  \n",
    "广告策划_深圳_results = crawl_liepin.crawl(城市=\"深圳\",关键词=\"广告策划\",学历='本科',工作经验='',登录cookie=cookie)  \n",
    "  \n",
    "# 将DataFrame保存为Excel文件  \n",
    "广告策划_深圳_results.to_excel(\"广告策划_深圳_results.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
